module net.BurtonRadons.parse.lexer;

private import std.ctype, std.file;
private import net.BurtonRadons.parse.encoding;

alias uint dchar;

extern (C)
char [] fmt (char [] format, ...)
{
    char [8192] buffer;
    int length;

    length = vsprintf (buffer, format, cast (va_list) (&format + 1));
    return buffer [0 .. length].dup;
}

/** A line marker; this delimits a segment of code. */
struct Marker
{
    char [] filename; /**< Source filename. */
    Lexer lexer; /**< Lexer this marker is for. */
    LexerSource source; /**< Source file this marker came from. */
    
    int line = 1; /**< Line index in the file, one-based. */
    int offset = 1; /**< Character offset from the start of the line, one-based. */
    int index = 0; /**< Character index into the lexer's std.string. */

    int endLine = 1; /**< Line index in the file of the end of the mark, one-based. */
    int endOffset = 1; /**< Line offset from the start of the end line for the end of the mark, one-based. */
    int endIndex = 0; /**< Character index into the lexer's string for the end of the segment. */

    /** Return a marker encapsulating both of these markers. */
    Marker contain (Marker other)
    {
        Marker result;

        if (other.source !== source)
            return *this;

        result.lexer = lexer;
        result.filename = filename;
        result.source = source;

        if (index < other.index)
        {
            result.line = line;
            result.offset = offset;
            result.index = index;
        }
        else
        {
            result.line = other.line;
            result.offset = other.offset;
            result.index = other.index;
        }

        if (endIndex < other.endIndex)
        {
            result.endLine = other.endLine;
            result.endOffset = other.endOffset;
            result.endIndex = other.endIndex;
        }
        else
        {
            result.endLine = endLine;
            result.endOffset = endOffset;
            result.endIndex = endIndex;
        }

        return result;
    }

    /** Return the string region this marker encapsulated. */
    char [] string ()
    {
        return source.string [index .. endIndex];
    }

    /** Reset the lexer to the start of the mark. */
    void rewind ()
    {
        assert (source === lexer.source);
        source.mark = *this;
        source.mark.endLine = source.mark.line;
        source.mark.endOffset = source.mark.offset;
        source.mark.endIndex = source.mark.index;
        source.pointer = &source.string [index];
        source.eofRead = 0;
    }

    /** Get the message form of this marker.  "source(line): ", "source: ", "(line): ", or "". */
    char [] message ()
    {
        if (filename !== null)
        {
            if (line)
                return fmt ("%.*s(%d:%d): ", filename, line, offset);
            return fmt ("%.*s: ", filename);
        }
        if (line)
            return fmt ("(%d:%d): ", line, offset);
        return "";
    }

    /** Get the message form without the semicolon. */
    char [] name ()
    {
        if (filename !== null)
        {
            if (line)
                return fmt ("%.*s(%d:%d)", filename, line, offset);
            return fmt ("%.*s", filename);
        }
        if (line)
            return fmt ("(%d:%d)", line, offset);
        return "";
    }
}

/** A lexing source file.
  * @TODO Make this Lexer.Source once Walter has fixed forward referencing.
  */
class LexerSource
{
    Marker mark; /**< The current marker. */
    char [] string; /**< Input std.string. */
    char *pointer; /**< Current text pointer. */
    char *end; /**< After the end of the file. */
    char *start; /**< Start of the std.string. */
    int eofRead; /**< Number of characters read past EOF. */

    /** Assign the input string, handling the BOM. */
    this (Lexer lexer, char [] filename, ubyte [] data)
    {
        this.mark.filename = filename;
        this.mark.lexer = lexer;
        this.mark.source = this;

        Encoding encoding = Encoding.kaboom (data);
        this.string = (char []) encoding.convert (data, Encoding.utf8);
        pointer = string;
        end = pointer + string.length;
        start = pointer;
    }

    /** Read the file. */
    this (Lexer lexer, char [] filename)
    {
        this (lexer, filename, (ubyte []) file.read (filename));
    }

    /** Return the coming character without consuming it or \0 if there is none. */
    final dchar peekch ()
    {
        if (pointer < end)
            return *pointer;
        return \0;
    }

    /** Return the next character and consume it or \0 at the end. */
    final dchar nextch ()
    {
        if (pointer < end)
        {
            dchar ch = *pointer ++;

            mark.index ++;
            mark.offset ++;
            mark.endIndex ++;
            mark.endOffset ++;

            if (ch == "\n" || ch == "\r")
            {
                mark.line ++;
                mark.offset = 1;
                mark.endLine ++;
                mark.endOffset = 1;
                if (ch == "\r" && pointer < end && *pointer == "\n")
                {
                    mark.index ++;
                    mark.endIndex ++;
                    pointer ++;
                }
            }

            return ch;
        }
        else
            eofRead ++;

        return \0;
    }

    /** Reverse a character that has been read. */
    final void ungetch ()
    {
        if (mark.index == 0)
            return;

        if (eofRead)
        {
            eofRead --;
            return;
        }

        dchar ch = *-- pointer;

        mark.index --;
        mark.offset --;
        mark.endIndex --;
        mark.endOffset --;

        if (ch == "\n" || ch == "\r")
        {
            mark.line --;
            mark.offset = 1;

            char *p = pointer;
            if (ch == "\n" && *-- p == "\r")
            {
                mark.index --;
                pointer = p;
            }
            else
                p = pointer;

            while (p > start)
                if ((ch = *-- p) == "\r" || ch == "\n")
                    break;
                else
                    mark.offset ++;

            mark.endIndex = mark.index;
            mark.endLine = mark.line;
            mark.endOffset = mark.offset;
        }
    }
}

/** Produces a series of tokens from an input std.string. */
class Lexer
{
    import net.BurtonRadons.parse.error;
    
    alias int TokenType;

    /** The forms a token can take. */
    enum : TokenType
    {
        reservedToken, /**< Never generated. */

        eofToken, /**< End-of-file marker. */
        eoiToken, /**< End-of-input marker. */
        idToken,  /**< Identifier; string holds the name. */
        lstringToken, /**< Single-comment, multiple comment, or bared string literal; string holds the value. */

        commaToken, /**< "," */
        semicolonToken, /**< ";" */
        lparenToken, /**< "(" */
        rparenToken, /**< ")" */
        lbracketToken, /**< "[" */
        rbracketToken, /**< "]" */
        lcurlyToken, /**< "{" */
        rcurlyToken, /**< "}" */
        dotToken, /**< "." */
        colonToken, /**< ":" */
        dotDotToken, /**< ".." */
        dotDotDotToken, /**< "..." */
        questionToken, /**< "?" */

        assignToken, /**< "=" */
        equalsToken, /**< "==" */
        isToken, /**< "===" */
        notToken, /**< "!" */
        notEqualsToken, /**< "!=" */
        isNotToken, /**< "!==" */

        ueToken, /**< "!<>" */
        unordToken, /**< "!<>=" */
        ulToken, /**< "!<" */
        uleToken, /**< "!<=" */
        ugToken, /**< "!>" */
        ugeToken, /**< "!>=" */

        addToken, /**< "+" */
        addAddToken, /**< "++" */
        addAssToken, /**< "+=" */
        subToken, /**< "-" */
        subSubToken, /**< "--" */
        subAssToken, /**< "-=" */
        mulToken, /**< "*" */
        mulAssToken, /**< "*=" */
        divToken, /**< "/" */
        divAssToken, /**< "/=" */
        modToken, /**< "%" */
        modAssToken, /**< "%=" */
        xorToken, /**< "^" */
        xorAssToken, /**< "^=" */
        andToken, /**< "&" */
        andAndToken, /**< "&&" */
        andAssToken, /**< "&=" */
        orToken,  /**< "|" */
        orOrToken, /**< "||" */
        orAssToken, /**< "|=" */
        catToken, /**< ~ */
        catAssToken, /**< ~= */

        lessToken, /**< "<" */
        lequalsToken, /**< "<=" */
        lshiftToken, /**< "<<" */
        lshiftAssToken, /**< "<<=" */

        greaterToken, /**< ">" */
        gequalsToken, /**< ">=" */
        rshiftToken, /**< ">>" */
        rshiftAssToken, /**< ">>=" */
        urshiftToken, /**< ">>>" */
        urshiftAssToken, /**< ">>>=" */

        lintToken, /**< Integer literal. */
        llongToken, /**< Integer literal with a "l" appended. */
        lulongToken, /**< Integer literal with a "ul" appended. */
        luintToken, /**< Integer literal with a "u" appended. */
        lrealToken, /**< Floating-point literal. */
        limaginaryToken, /**< Imaginary literal. */

        multilineCommentToken, /**< A multiple-line comment, with all content held in std.string. */
        lineCommentToken, /**< A single-line comment, with the body held in std.string. */
        nestedCommentToken, /**< A nesting multiple-line comment, with the body held in std.string. */

        abstractToken, /**< "abstract" */
        aliasToken, /**< "alias" */
        assertToken, /**< "assert" */
        bitToken, /**< "bit" */
        bodyToken, /**< "body" */
        byteToken, /**< "byte" */
        caseToken, /**< "case" */
        castToken, /**< "cast" */
        catchToken, /**< "catch" */
        centToken, /**< "cent" */
        charToken, /**< "char" */
        classToken, /**< "class" */
        complexToken, /**< "complex" */
        constToken, /**< "const" */
        defaultToken, /**< "default" */
        delegateToken, /**< "delegate" */
        deleteToken, /**< "delete" */
        doToken, /**< "do" */
        doubleToken, /**< "double" */
        elseToken, /**< "else" */
        enumToken, /**< "enum" */
        externToken, /**< "extern" */
        falseToken, /**< "false" */
        finalToken, /**< "final" */
        finallyToken, /**< "finally" */
        floatToken, /**< "float" */
        forToken, /**< "for" */
        ifToken, /**< "if" */
        imaginaryToken, /**< "imaginary" */
        importToken, /**< "import" */
        inToken, /**< "in" */
        inoutToken, /**< "inout" */
        instanceToken, /**< "instance" */
        intToken, /**< "int" */
        longToken, /**< "long" */
        moduleToken, /**< "module" */
        newToken, /**< "new" */
        nullToken, /**< "null" */
        outToken, /**< "out" */
        overrideToken, /**< "override" */
        realToken, /**< "real" */
        returnToken, /**< "return" */
        shortToken, /**< "short" */
        staticToken, /**< "static" */
        structToken, /**< "struct" */
        superToken, /**< "super" */
        switchToken, /**< "switch" */
        synchronizedToken, /**< "synchronized" */
        templateToken, /**< "template" */
        thisToken, /**< "this" */
        throwToken, /**< "throw" */
        tryToken, /**< "try" */
        trueToken, /**< "true" */
        typedefToken, /**< "typedef" */
        ubyteToken, /**< "ubyte" */
        ucentToken, /**< "ucent" */
        uintToken, /**< "uint" */
        ulongToken, /**< "ulong" */
        unittestToken, /**< "unittest" */
        ushortToken, /**< "ushort" */
        voidToken, /**< "void" */
        volatileToken, /**< "volatile" */
        wcharToken, /**< "wchar" */
        whileToken, /**< "while" */
        withToken, /**< "with" */
    }

    static TokenType [char []] reservedIds; /**< Reserved identifiers to token type, setup in the static constructor. */

    /* Setup the reservedIds */
    static this ()
    {
        alias reservedIds r;

        r ["abstract"] = abstractToken;
        r ["alias"] = aliasToken;
        r ["assert"] = assertToken;
        r ["bit"] = bitToken;
        r ["body"] = bodyToken;
        r ["byte"] = byteToken;
        r ["case"] = caseToken;
        r ["cast"] = castToken;
        r ["catch"] = catchToken;
        r ["cent"] = centToken;
        r ["char"] = charToken;
        r ["class"] = classToken;
        r ["complex"] = complexToken;
        r ["const"] = constToken;
        r ["default"] = defaultToken;
        r ["delegate"] = delegateToken;
        r ["delete"] = deleteToken;
        r ["do"] = doToken;
        r ["double"] = doubleToken;
        r ["else"] = elseToken;
        r ["enum"] = enumToken;
        r ["real"] = realToken;
        r ["extern"] = externToken;
        r ["false"] = falseToken;
        r ["final"] = finalToken;
        r ["finally"] = finallyToken;
        r ["float"] = floatToken;
        r ["for"] = forToken;
        r ["if"] = ifToken;
        r ["imaginary"] = imaginaryToken;
        r ["import"] = importToken;
        r ["in"] = inToken;
        r ["inout"] = inoutToken;
        r ["instance"] = instanceToken;
        r ["int"] = intToken;
        r ["long"] = longToken;
        r ["module"] = moduleToken;
        r ["new"] = newToken;
        r ["null"] = nullToken;
        r ["out"] = outToken;
        r ["override"] = overrideToken;
        r ["return"] = returnToken;
        r ["short"] = shortToken;
        r ["static"] = staticToken;
        r ["struct"] = structToken;
        r ["super"] = superToken;
        r ["switch"] = switchToken;
        r ["synchronized"] = synchronizedToken;
        r ["template"] = templateToken;
        r ["this"] = thisToken;
        r ["throw"] = throwToken;
        r ["try"] = tryToken;
        r ["true"] = trueToken;
        r ["typedef"] = typedefToken;
        r ["ubyte"] = ubyteToken;
        r ["ucent"] = ucentToken;
        r ["uint"] = uintToken;
        r ["ulong"] = ulongToken;
        r ["unittest"] = unittestToken;
        r ["ushort"] = ushortToken;
        r ["void"] = voidToken;
        r ["volatile"] = volatileToken;
        r ["wchar"] = wcharToken;
        r ["while"] = whileToken;
        r ["with"] = withToken;
    }

    static char [] tokenTypeName (TokenType type)
    {
        return Token.typeNameBase (type);
    }

    static char [] tokenTypeRepr (TokenType type)
    {
        switch (type)
        {
            case reservedToken: return "<<reserved>>";
            case eofToken: return "<<end-of-file>>";
            case eoiToken: return "<<end-of-input>>";
            case idToken: return "<<identifier>>";
            case lstringToken: return "<<string-literal>>";
            case commaToken: return ",";
            case semicolonToken: return ";";
            case lparenToken: return "(";
            case rparenToken: return ")";
            case lbracketToken: return "[";
            case rbracketToken: return "]";
            case lcurlyToken: return "{";
            case rcurlyToken: return "}";

            case dotToken: return ".";
            case colonToken: return ":";
            case dotDotToken: return "..";
            case dotDotDotToken: return "...";
            case questionToken: return "?";

            case assignToken: return "=";
            case equalsToken: return "==";
            case isToken: return "===";
            case notToken: return "!";
            case notEqualsToken: return "!=";
            case isNotToken: return "!==";

            case ueToken: return "!<>";
            case unordToken: return "!<>=";
            case ulToken: return "!<";
            case uleToken: return "!<=";
            case ugToken: return "!>";
            case ugeToken: return "!>=";

            case addToken: return "+";
            case addAddToken: return "++";
            case addAssToken: return "+=";
            case subToken: return "-";
            case subSubToken: return "--";
            case subAssToken: return "-=";
            case mulToken: return "*";
            case mulAssToken: return "*=";
            case divToken: return "/";
            case divAssToken: return "/=";
            case modToken: return "%";
            case modAssToken: return "%=";
            case xorToken: return "^";
            case xorAssToken: return "^=";
            case andToken: return "&";
            case andAndToken: return "&&";
            case andAssToken: return "&=";
            case orToken: return "|";
            case orOrToken: return "||";
            case orAssToken: return "|=";
            case catToken: return "~";
            case catAssToken: return "=";

            case lessToken: return "<";
            case lequalsToken: return "<=";
            case lshiftToken: return "<<";
            case lshiftAssToken: return "<<=";

            case greaterToken: return ">";
            case gequalsToken: return ">=";
            case rshiftToken: return ">>";
            case rshiftAssToken: return ">>=";
            case urshiftToken: return ">>>";
            case urshiftAssToken: return ">>>=";

            case lintToken: return '<<int-literal>>';
            case llongToken: return '<<long-literal>>';
            case lulongToken: return '<<ulong-literal>>';
            case luintToken: return '<<uint-literal>>';
            case lrealToken: return '<<float-literal>>';
            case limaginaryToken: return '<<imaginary-literal>>';

            case multilineCommentToken: return "<<multiline-comment>>";
            case lineCommentToken: return "<<line-comment>>";
            case nestedCommentToken: return "<<nested-comment>>";

            case abstractToken: return 'abstract';
            case aliasToken: return 'alias';
            case assertToken: return 'assert';
            case bitToken: return 'bit';
            case bodyToken: return 'body';
            case byteToken: return 'byte';
            case caseToken: return 'case';
            case castToken: return 'cast';
            case catchToken: return 'catch';
            case centToken: return 'cent';
            case charToken: return 'char';
            case classToken: return 'class';
            case complexToken: return 'complex';
            case constToken: return 'const';
            case defaultToken: return 'default';
            case delegateToken: return 'delegate';
            case deleteToken: return 'delete';
            case doToken: return 'do';
            case doubleToken: return 'double';
            case elseToken: return 'else';
            case enumToken: return 'enum';
            case realToken: return 'real';
            case externToken: return 'extern';
            case falseToken: return 'false';
            case finalToken: return 'final';
            case finallyToken: return 'finally';
            case floatToken: return 'float';
            case forToken: return 'for';
            case ifToken: return 'if';
            case imaginaryToken: return 'imaginary';
            case importToken: return 'import';
            case inToken: return 'in';
            case inoutToken: return 'inout';
            case instanceToken: return 'instance';
            case intToken: return 'int';
            case longToken: return 'long';
            case moduleToken: return 'module';
            case newToken: return 'new';
            case nullToken: return 'null';
            case outToken: return 'out';
            case overrideToken: return 'override';
            case returnToken: return 'return';
            case shortToken: return 'short';
            case staticToken: return 'static';
            case structToken: return 'struct';
            case superToken: return 'super';
            case switchToken: return 'switch';
            case synchronizedToken: return 'synchronized';
            case templateToken: return 'template';
            case thisToken: return 'this';
            case throwToken: return 'throw';
            case tryToken: return 'try';
            case trueToken: return 'true';
            case typedefToken: return 'typedef';
            case ubyteToken: return 'ubyte';
            case ucentToken: return 'ucent';
            case uintToken: return 'uint';
            case ulongToken: return 'ulong';
            case unittestToken: return 'unittest';
            case ushortToken: return 'ushort';
            case voidToken: return 'void';
            case volatileToken: return 'volatile';
            case wcharToken: return 'wchar';
            case whileToken: return 'while';
            case withToken: return 'with';
        }
    }

    /** Return the token type of this identifier or idToken. */
    TokenType idReserved (char [] string)
    {
        if (string in reservedIds)
            return reservedIds [string];
        return idToken;
    }

    /** A single token object. */
    class Token
    {
        Token next; /**< The next token after we did some forward scanning. */
        TokenType type; /**< The type of the token. */
        Marker mark; /**< Mark at the point of the token. */

        //union TODO
        //{
            char [] string; /**< The string body in UTF-8. */
            ulong integer; /**< Value when an integer literal (lintToken, llongToken, lulongToken). */
            real floating; /**< Value when a floating-point literal or complex. */
        //}

        /** Empty constructor. */
        this ()
        {
        }

        /** Return the token after this one. */
        Token peek ()
        {
            Token save, result;

            if (next !== null)
                return next;
            if (type == eoiToken)
                return this;
            save = mark.lexer.token;
            mark.lexer.token = null;
            next = mark.lexer.nextToken ();
            mark.lexer.token = save;
            return next;
        }

        /** Return the string representation of the token. */
        char [] toString ()
        {
            switch (type)
            {
                case eofToken: return "<<end-of-file>>";
                case eoiToken: return "<<end-of-input>>";
                default: return mark.string ();
            }
        }

        /** Get the token but quoted appropriately. */
        char [] quoted ()
        {
            switch (type)
            {
                case eofToken: return "end of file";
                case eoiToken: return "end of input";
                case lstringToken: return string;
                default: return '"' ~ mark.string () ~ '"';
            }
        }

        /** Print the string representation and a newline. */
        void print ()
        {
            printf ("%.*s\n", toString ());
        }

        /** Return whether this token is of one of the basic types. */
        bit isBasicType ()
        {
            switch (type)
            {
                case voidToken:
                case wcharToken:
                case bitToken:
                case charToken:
                case imaginaryToken:
                case complexToken:
                case byteToken:
                case ubyteToken:
                case shortToken:
                case ushortToken:
                case intToken:
                case uintToken:
                case longToken:
                case ulongToken:
                case centToken:
                case ucentToken:
                case floatToken:
                case doubleToken:
                case realToken:
                    return true;
                default:
                    return false;
            }
        }

        /** Get the name of the token's type. */
        char [] typeName ()
        {
            return typeNameBase (type);
        }

        /** Get the name of an archetype. */
        static char [] typeNameBase (TokenType type)
        {
            switch (type)
            {
                case eofToken: return "end of file";
                case eoiToken: return "end of input";
                case idToken: return "identifier";
                case lstringToken: return "string";
                case commaToken: return 'comma (",")';
                case semicolonToken: return 'semicolon (";")';
                case lparenToken: return 'left parenthesis ("(")';
                case rparenToken: return 'right parenthesis (")")';
                case lbracketToken: return 'left bracket ("[")';
                case rbracketToken: return 'right bracket ("]")';
                case lcurlyToken: return 'left curly brace ("{")';
                case rcurlyToken: return 'right curly brace ("}")';
                case dotToken: return 'dot (".")';
                case dotDotToken: return 'double-dot ("..")';
                case dotDotDotToken: return 'triple-dot ("...")';
                case questionToken: return 'question mark ("?")';
                case colonToken: return 'colon (":")';
                case assignToken: return 'assignment ("=")';
                case equalsToken: return 'equals ("==")';
                case isToken: return 'equals identity ("===")';
                case notToken: return 'not ("!")';
                case notEqualsToken: return 'not equal ("!=")';
                case isNotToken: return 'not equal identity ("!==")';
                case ueToken: return 'unordered or equal ("!<>")';
                case unordToken: return 'unordered ("!<>=")';
                case ulToken: return 'unordered or less ("!<")';
                case uleToken: return 'unordered, less, or equal ("!<=")';
                case ugToken: return 'unordered or greater ("!>")';
                case ugeToken: return 'unordered, greater, or equal ("!>=")';
                case addToken: return 'add ("+")';
                case addAddToken: return 'increment ("++")';
                case addAssToken: return 'add and assign ("+=")';
                case subToken: return 'subtract ("-")';
                case subSubToken: return 'decrement ("--")';
                case subAssToken: return 'subtract and assign ("-=")';
                case mulToken: return 'multiply ("*")';
                case mulAssToken: return 'multiply and assign ("*=")';
                case divToken: return 'divide ("/")';
                case divAssToken: return 'divide and assign ("/=")';
                case modToken: return 'division remainder ("%")';
                case modAssToken: return 'remainder and assign ("%=")';
                case xorToken: return 'bitwise exclusive OR ("^")';
                case xorAssToken: return 'bitwise exclusive OR ("^=")';
                case andToken: return 'bitwise AND ("&")';
                case andAndToken: return 'logical AND ("&&")';
                case andAssToken: return 'bitwise AND and assign ("&=")';
                case orToken: return 'bitwise OR ("|")';
                case orOrToken: return 'logical OR ("||")';
                case orAssToken: return 'bitwise OR and assign ("|=")';
                case catToken: return 'concatenate ("~")';
                case catAssToken: return 'concatenate and assign ("~=")';
                case lessToken: return 'less than ("<")';
                case lequalsToken: return 'less than or equal ("<=")';
                case lshiftToken: return 'left shift ("<<")';
                case lshiftAssToken: return 'left shift and assign ("<<=")';
                case greaterToken: return 'greater than (">")';
                case gequalsToken: return 'greater than and assign (">=")';
                case rshiftToken: return 'right shift (">>")';
                case rshiftAssToken: return 'right shift and assign (">>=")';
                case urshiftToken: return 'unsigned right shift (">>>")';
                case urshiftAssToken: return 'unsigned right shift and assign (">>>=")';
                case lintToken: return 'integer literal';
                case llongToken: return 'long literal';
                case lulongToken: return 'unsigned long literal';
                case luintToken: return 'unsigned integer literal';
                case lrealToken: return 'floating-point literal';
                case limaginaryToken: return 'imaginary literal';
                case multilineCommentToken: return 'multiple-line comment';
                case lineCommentToken: return 'line comment';
                case nestedCommentToken: return 'nested comment';
                case abstractToken: return '"abstract"';
                case aliasToken: return '"alias"';
                case assertToken: return '"assert"';
                case caseToken: return '"case"';
                case castToken: return '"cast"';
                case catchToken: return '"catch"';
                case charToken: return '"char"';
                case doToken: return '"do"';
                case elseToken: return '"else"';
                case enumToken: return '"enum"';
                case externToken: return '"extern"';
                case falseToken: return '"false"';
                case finalToken: return '"final"';
                case finallyToken: return '"finally"';
                case ifToken: return '"if"';
                case importToken: return '"import"';
                case inToken: return '"in"';
                case inoutToken: return '"inout"';
                case instanceToken: return '"instance"';
                case moduleToken: return '"module"';
                case newToken: return "new";
                case nullToken: return "null";
                case returnToken: return '"return"';
                case staticToken: return '"static"';
                case structToken: return '"struct"';
                case superToken: return '"super"';
                case switchToken: return '"switch"';
                case synchronizedToken: return '"synchronized"';
                case templateToken: return '"template"';
                case thisToken: return '"this"';
                case throwToken: return '"throw"';
                case tryToken: return '"try"';
                case trueToken: return '"true"';
                case typedefToken: return '"typedef"';
                case whileToken: return '"while"';
                case withToken: return '"with"';
                case voidToken: return '"void"';
                case wcharToken: return '"wchar"';
                case bitToken: return '"bit"';
                case imaginaryToken: return '"imaginary"';
                case classToken: return '"class"';
                case complexToken: return '"complex"';
                case constToken: return '"const"';
                case defaultToken: return '"default"';
                case byteToken: return '"byte"';
                case ubyteToken: return '"ubyte"';
                case shortToken: return '"short"';
                case ushortToken: return '"ushort"';
                case intToken: return '"int"';
                case uintToken: return '"uint"';
                case longToken: return '"long"';
                case ulongToken: return '"ulong"';
                case unittestToken: return '"unittest"';
                case centToken: return '"cent"';
                case ucentToken: return '"ucent"';
                case floatToken: return '"float"';
                case forToken: return '"for"';
                case doubleToken: return '"double"';
                case realToken: return '"real"';
                case delegateToken: return '"delegate"';
                case deleteToken: return '"delete"';
                case outToken: return '"out"';
                case overrideToken: return '"override"';
                case bodyToken: return '"body"';
                case volatileToken: return '"volatile"';
                default:
                    throw new Error (fmt ("Unhandled token type %d", type));
            }
        }
    }

    LexerSource [] sources; /**< Source file stack; last one is the current source. */
    LexerSource source; /**< Current source file. */
    bit eofWanted = false; /**< Whether you want to hear about an EOF. */
    bit commentWanted = false; /**< Whether you want to hear about comments. */
    int lexerErrorCount = 0; /**< The number of lexer errors that have occurred. */
    int errorCount = 0; /**< The number of errors altogether that have occurred. */

    /** Assign the input string and setup for lexing, read the first token. */
    this (char [] filename, ubyte [] source)
    {
        token = new Token ();
        open (filename, source);
        nextToken ();
    }

    /** Read the file and setup for lexing, read the first token. */
    this (char [] filename)
    {
        token = new Token ();
        open (filename);
        nextToken ();
    }

    /** A generic error has occurred. */
    void error (MarkedError code)
    {
        errorCount ++;
        printf ("%.*s\n", code.toString ());
    }

    /** An error in the lexer has occurred. */
    void lexerError (MarkedError code)
    {
        lexerErrorCount ++;
        error (code);
    }

    /** Open this source file and append it to the stack. */
    void open (char [] filename)
    {
        sources ~= new LexerSource (this, filename);
        source = sources [sources.length - 1];
    }

    /** Open this source file and append it to the stack. */
    void open (char [] filename, ubyte [] string)
    {
        sources ~= new LexerSource (this, filename, string);
        source = sources [sources.length - 1];
    }

    /** Close a source file, reading from the nested file. */
    void close ()
    {
        if (sources.length > 1)
        {
            source = sources [sources.length - 1];
            sources = sources [0 .. sources.length - 1];
        }
        else
        {
            source = null;
            sources = null;
        }
    }

    /** Return the coming character without consuming it or \0 if there is none. */
    dchar peekch ()
    {
        if (source !== null)
            return source.peekch ();
        return \0;
    }

    /** Return the next character and consume it or \0 at the end. */
    dchar nextch ()
    {
        if (source !== null)
            return source.nextch ();
        return \0;
    }

    /** Consume the next character and peek at the one after that. */
    dchar nextAndPeekch ()
    {
        nextch ();
        return peekch ();
    }

    /** Reverse a character after being read.  You cannot reverse closed sources. */
    void ungetch ()
    {
        if (source !== null)
            source.ungetch ();
    }

    /** Return whether this is the start of an identifier. */
    char isIdStart (char ch)
    {
        return isalpha (ch) || ch == "_";
    }

    /** Return whether this is in the middle of an identifier. */
    char isIdMiddle (char ch)
    {
        return isalnum (ch) || ch == "_";
    }

    Token token; /**< Current token. */

    /** Create a new Token, returning one from the open pool if available. */
    Token createToken ()
    {
        if (token === null)
            return new Token ();
        return token;
    }

    Token createToken (Marker mark, TokenType type)
    {
        Token token = createToken ();
        token.mark = mark;
        token.type = type;
        return token;
    }

    Token createToken (Marker mark, TokenType type, char [] string)
    {
        Token token = createToken (mark, type);
        token.string = string;
        return token;
    }

    /** Return the next token, filtering out unwanted tokens. */
    Token nextToken ()
    {
    redo:
        if (token !== null && token.next !== null)
            token = token.next;
        else
            token = nextTokenBase ();

        if (token.type == eofToken && !eofWanted)
            goto redo;
        if (token.type == multilineCommentToken && !commentWanted)
            goto redo;
        if (token.type == lineCommentToken && !commentWanted)
            goto redo;
        if (token.type == nestedCommentToken && !commentWanted)
            goto redo;
        return token;
    }

    /** Return the next token. */
    Token nextTokenBase ()
    {
        Marker bod;

        if (source === null)
            return createToken (bod, eoiToken);
        while (1)
        {
            bod = source.mark;
            dchar ch = nextch ();

            if (isIdStart (ch))
            {
                while (isIdMiddle (peekch ()))
                    nextch ();
                bod = bod.contain (source.mark);
                char [] string = bod.string ();
                TokenType type = idToken;

                type = idReserved (string);
                return createToken (bod, type, string);
            }

            if (isspace (ch))
                continue;

            if (isdigit (ch))
                return nextNumber (bod, ch);

            switch (ch)
            {
                case 0x001A:
                case \0: 
                    close ();
                    return createToken (bod, eofToken);
                case ",": return createToken (bod.contain (source.mark), commaToken);
                case ";": return createToken (bod.contain (source.mark), semicolonToken);
                case "(": return createToken (bod.contain (source.mark), lparenToken);
                case ")": return createToken (bod.contain (source.mark), rparenToken);
                case "[": return createToken (bod.contain (source.mark), lbracketToken);
                case "]": return createToken (bod.contain (source.mark), rbracketToken);
                case "{": return createToken (bod.contain (source.mark), lcurlyToken);
                case "}": return createToken (bod.contain (source.mark), rcurlyToken);
                case ":": return createToken (bod.contain (source.mark), colonToken);
                case "?": return createToken (bod.contain (source.mark), questionToken);

                case ".":
                    if (peekch () == ".") // ..., ..
                    {
                        nextch ();
                        if (peekch () == ".") // ..., ..
                        {
                            nextch ();
                            return createToken (bod.contain (source.mark), dotDotDotToken);
                        }
                        else // ..
                            return createToken (bod.contain (source.mark), dotDotToken);
                    }
                    else // .
                        return createToken (bod.contain (source.mark), dotToken);

                case ">": // >, >=, >>, >>=, >>>, >>>=
                    if ((ch = nextch ()) == ">") // >>, >>=, >>>, >>>=
                    {
                        if ((ch = nextch ()) == ">") // >>>, >>>=
                        {
                            if (peekch () == "=") // >>>=
                            {
                                nextch ();
                                return createToken (bod.contain (source.mark), urshiftAssToken);
                            }
                            else // >>>
                                return createToken (bod.contain (source.mark), urshiftToken);
                        }
                        else if (ch == "=") // >>=
                            return createToken (bod.contain (source.mark), rshiftAssToken);
                        else // >>
                        {
                            ungetch ();
                            return createToken (bod.contain (source.mark), rshiftToken);
                        }
                    }
                    else if (ch == "=") // >=
                        return createToken (bod.contain (source.mark), gequalsToken);
                    else // >
                    {
                        ungetch ();
                        return createToken (bod.contain (source.mark), greaterToken);
                    }

                case "~": // ~, ~=
                    if (peekch () == "=") // ~=
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), catAssToken);
                    }
                    else // ~
                        return createToken (bod.contain (source.mark), catToken);

                case "+": // +, ++, +=
                    if (peekch () == "+") // ++
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), addAddToken);
                    }
                    else if (peekch () == "=") // +=
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), addAssToken);
                    } 
                    else // +
                        return createToken (bod.contain (source.mark), addToken); 

                case "-": // -, --, -=
                    if ((ch = nextch ()) == "-") // --
                        return createToken (bod.contain (source.mark), subSubToken);
                    else if (ch == "=") // -=
                        return createToken (bod.contain (source.mark), subAssToken);
                    else // -
                    {
                        ungetch ();
                        return createToken (bod.contain (source.mark), subToken);
                    }

                case "=": // =, ==, ===
                    if (peekch () == "=") // ==, ===
                    {
                        if (nextAndPeekch () == "=") // ===
                        {
                            nextch ();
                            return createToken (bod.contain (source.mark), isToken);
                        } 
                        else // ==
                            return createToken (bod.contain (source.mark), equalsToken);
                    } 
                    else // =
                        return createToken (bod.contain (source.mark), assignToken);

                case "&": // &, &&, &=
                    if (peekch () == "=") // &=
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), andAssToken);
                    }
                    else if (peekch () == "&") // &&
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), andAndToken);
                    }
                    else // &
                        return createToken (bod.contain (source.mark), andToken);

                case "|": // |, ||, |=
                    if (peekch () == "=") // |=
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), orAssToken);
                    }
                    else if (peekch () == "|") // ||
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), orOrToken);
                    }
                    else // |
                        return createToken (bod.contain (source.mark), orToken);

                case "/": // /, /=, /* ... */, // ... \n, /+ ... +/
                    ch = nextch ();
                    if (ch == "=") // /=
                        return createToken (bod.contain (source.mark), divAssToken);
                    else if (ch == "*") // /* ... */
                    {
                        while (1)
                        {
                            if ((ch = nextch ()) == 0)
                            {
                                lexerError (new UnterminatedMultilineCommentError (bod.contain (source.mark)));
                                return createToken (bod.contain (source.mark), multilineCommentToken);
                            }
                            
                            if (ch == "*" && nextch () == "/")
                                return createToken (bod.contain (source.mark), multilineCommentToken);
                        }
                    }
                    else if (ch == "/") // // ... \n
                    {
                        while (1)
                        {
                            if ((ch = nextch ()) == 0 || ch == "\n" || ch == "\r")
                                return createToken (bod.contain (source.mark), lineCommentToken);
                        }
                    }
                    else if (ch == "+") // /+ ... +/
                    {
                        int depth = 1;

                        while (1)
                        {
                            if ((ch = nextch ()) == 0)
                            {
                                lexerError (new UnterminatedNestedCommentError (bod.contain (source.mark), depth));
                                return createToken (bod.contain (source.mark), nestedCommentToken);
                            }
                            else if (ch == "+" && (ch = nextch ()) == "/")
                            {
                                depth --;
                                if (depth == 0)
                                    return createToken (bod.contain (source.mark), nestedCommentToken);
                            }
                            else if (ch == "/" && (ch = nextch ()) == "+")
                                depth ++;
                        }
                    }
                    else // /
                    {
                        ungetch ();
                        return createToken (bod.contain (source.mark), divToken);
                    }

                case "!": // !, !=, !==, !<>=, !<, !<=, !>, !>=
                    ch = nextch ();
                    if (ch == "=") // !=, !==
                    {
                        if (peekch () == "=") // !==
                        {
                            nextch ();
                            return createToken (bod.contain (source.mark), isNotToken);
                        }
                        else // !=
                            return createToken (bod.contain (source.mark), notEqualsToken);
                    }
                    else if (ch == "<") // !<>, !<>=, !<, !<=
                    {
                        if ((ch = nextch ()) == ">") // !<>, !<>=
                        {
                            if (peekch () == "=") // !<>=
                            {
                                nextch ();
                                return createToken (bod.contain (source.mark), unordToken);
                            }
                            else // !<>
                                return createToken (bod.contain (source.mark), ueToken);
                        }
                        else if (ch == "=") // !<=
                            return createToken (bod.contain (source.mark), uleToken);
                        else // !<
                        {
                            ungetch ();
                            return createToken (bod.contain (source.mark), ulToken);
                        }
                    }
                    else if (ch == ">") // !>, !>=
                    {
                        if (peekch () == "=") // !>=
                        {
                            nextch ();
                            return createToken (bod.contain (source.mark), ugeToken);
                        }
                        else // !>
                            return createToken (bod.contain (source.mark), ugToken);
                    }
                    else // !
                    {
                        ungetch ();
                        return createToken (bod.contain (source.mark), notToken);
                    }

                case "*": // *, *=
                    if (peekch () == "=") // *=
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), mulAssToken);
                    }
                    else // *
                        return createToken (bod.contain (source.mark), mulToken);

                case "%": // %, %=
                    if (peekch () == "=") // %=
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), modAssToken);
                    }
                    else // %
                        return createToken (bod.contain (source.mark), modToken);

                case "^": // ^, ^=
                    if (peekch () == "=") // ^=
                    {
                        nextch ();
                        return createToken (bod.contain (source.mark), xorAssToken);
                    }
                    else // ^
                        return createToken (bod.contain (source.mark), xorToken);

                case "<": // <, <=, <<, <<=
                    if ((ch = nextch ()) == "<") // <<, <<=
                    {
                        if (peekch () == "=") // <<=
                        {
                            nextch ();
                            return createToken (bod.contain (source.mark), lshiftAssToken);
                        }
                        else // <<
                            return createToken (bod.contain (source.mark), lshiftToken);
                    }
                    else if (ch == "=") // <=
                        return createToken (bod.contain (source.mark), lequalsToken);
                    else // <
                    {
                        ungetch ();
                        return createToken (bod.contain (source.mark), lessToken);
                    }

                case "\"": // "..."
                {
                    dchar [] string;

                    while (1)
                    {
                        if ((ch = nextch ()) == 0)
                        {
                            lexerError (new UnterminatedDoublequoteStringError (bod.contain (source.mark)));
                            break;
                        }
                        else if (ch == "\\")
                            string ~= readEscape (bod);
                        else if (ch == "\"")
                            return createToken (bod.contain (source.mark), lstringToken, (char []) Encoding.utf32.toascii (((ubyte *) string) [0 .. string.length * 4]));
                        else
                            string ~= ch;
                    }
                    break;
                }

                case "'": // '...'
                {
                    dchar [] string;

                    while (1)
                    {
                        if ((ch = nextch ()) == 0)
                        {
                            lexerError (new UnterminatedSinglequoteStringError (bod.contain (source.mark)));
                            break;
                        }
                        else if (ch == "'")
                            return createToken (bod.contain (source.mark), lstringToken, (char []) Encoding.utf32.toascii (((ubyte *) string) [0 .. string.length * 4]));
                        else
                            string ~= ch;
                    }
                    break;
                }

                case "\\": // \ EscapeCode
                {
                    dchar [1] string;

                    string [0] = readEscape (bod);
                    return createToken (bod.contain (source.mark), lstringToken, (char []) Encoding.utf32.toascii (((ubyte *) string) [0 .. string.length * 4]));
                }

                default:
                    lexerError (new InvalidCharacterError (bod.contain (source.mark), ch));
                    throw new Error ("Boogaboo\n");
            }
        }

        return null;
    }

    /** Read a single escape character. */
    dchar readEscape (Marker start)
    {
        dchar ch = nextch ();

        if (ch == 0)
        {
            lexerError (new EOFAfterEscapeError (start.contain (source.mark)));
            return 0;
        }

        if (ch == "'" || ch == '"' || ch == "?" || ch == "\\") return ch;
        if (ch == "a") return \a;
        if (ch == "b") return \b;
        if (ch == "f") return \f;
        if (ch == "n") return \n;
        if (ch == "r") return \r;
        if (ch == "t") return \t;
        if (ch == "v") return \v;

        if (ch == "x") // \ x HexDigit HexDigit
        {
            dchar da, db;

            if (!hexdigitRead (da))
            {
                lexerError (new IllformedEscapeError (start.contain (source.mark), "Ill formed hex escape; expected two hexadecimal digits."));
                return 0;
            }

            if (!hexdigitRead (db))
            {
                lexerError (new IllformedEscapeError (start.contain (source.mark), "Ill formed hex escape; expected a second hexadecimal digit."));
                return 0;
            }

            return da * 16 + db;
        }

        if (ch == "u") // \ u HexDigit HexDigit HexDigit HexDigit
        {
            dchar da, db, dc, dd;

            if (!hexdigitRead (da)
             || !hexdigitRead (db)
             || !hexdigitRead (dc)
             || !hexdigitRead (dd))
            {
                lexerError (new IllformedEscapeError (start.contain (source.mark), "Ill formed UNICODE escape; expected four hexadecimal digits."));
            }

            return (da << 12) | (db << 8) | (dc << 4) | dd;
        }

        if (isoctal (ch)) // \ OctalDigit [OctalDigit [OctalDigit]]
        {
            dchar da = ch - '0', db, dc;

            if (isoctal (peekch ()))
            {
                db = nextch () - '0';
                if (isoctal (peekch ()))
                {
                    dc = nextch () - '0';
                    return da * 64 + db * 8 + dc;
                }
                else
                    return da * 8 + db;
            }
            else
                return da;
        }
    }
    
    /** Return whether this is an octal digit ('0' to '7') */
    bit isoctal (dchar value)
    {
        return (value >= '0' && value <= '7');
    }

    /** Read a character into a hex digit, return whether it succeeded. */
    bit hexdigitRead (out dchar value)
    {
        dchar ch = nextch ();

        if (ch == 0)
            return false;
        if (ch >= '0' && ch <= '9')
            value = value - '0';
        else if (ch >= 'a' && ch <= 'f')
            value = value - 'a' + 10;
        else if (ch >= 'A' && ch <= 'F')
            value = value - 'A' + 10;
        else
            return false;
        return true;
    }

    /** Return a numeric token. */
    Token nextNumber (Marker bod, dchar ch)
    {
        dchar pch = peekch ();

        if (ch == '0')
        {
            if (ctype.tolower (pch) == "x")
            {
                nextch ();
                return nextNumberBase (bod, 16, false);
            }
            else if (ctype.tolower (pch) == "b")
            {
                nextch ();
                return nextNumberBase (bod, 2, false);
            }
        }

        /* First we need to find out whether there's a "h" at the end. */
        Marker point = source.mark;

        while (1)
        {
            if (ctype.tolower (pch) == "h")
            {
                point.rewind ();
                ungetch ();
                return nextNumberBase (bod, 16, true);
            }

            if (!isdigit (pch) 
             && !(pch >= 'a' && pch <= 'f') 
             && !(pch >= 'A' && pch <= 'F'))
                break;

            pch = nextch ();
        }

        /* Didn't find, do it normally. */
        point.rewind ();
        if (ch == '0')
            return nextNumberBase (bod, 8, false);
        ungetch ();
        return nextNumberBase (bod, 10, false);
    }

    final int readDigit (dchar ch, int base)
    {
        int digit;

        if (ch >= 'a' && ch <= 'z')
            digit = ch - 'a' + 10;
        else if (ch >= 'A' && ch <= 'Z')
            digit = ch - 'A' + 10;
        else if (ch >= '0' && ch <= '9')
            digit = ch - '0';
        else
            return -1;
        if (digit >= base)
            return -1;
        return digit;
    }

    /** Do the actual reading of a number. */
    Token nextNumberBase (Marker start, int base, bit expecth)
    {
        real fvalue;
        ulong value;
        int digit;
        dchar ch;
        bit isfloat;

        while (1)
        {
            if ((digit = readDigit (ch = peekch (), base)) < 0)
                break;
            value = value * base + digit;
            nextch ();
        }

        if (ch == '.')
        {
            real mul = 0, div = 1;

            nextch ();
            if (!isfloat)
                fvalue = value;
            isfloat = true;

            while (1)
            {
                if ((digit = readDigit (ch = peekch (), base)) < 0)
                    break;
                mul = mul * base + digit;
                div = div * base;
                nextch ();
            }

            fvalue += mul / div;
        }

        assert (ch != 'e');
        assert (ch != 'E');

        if (expecth && ctype.tolower (ch) == 'h')
            ch = nextAndPeekch ();

        TokenType type = isfloat ? lrealToken : lintToken;

        if (ctype.tolower (ch) == 'l')
        {
            ch = nextAndPeekch ();
            type = isfloat ? lrealToken : llongToken;
            if (ctype.tolower (ch) == 'u')
            {
                nextch ();
                type = isfloat ? lrealToken : lulongToken;
            }
        }
        else if (ctype.tolower (ch) == 'u')
        {
            ch = nextAndPeekch ();
            type = isfloat ? lrealToken : luintToken;
            if (ctype.tolower (ch) == 'l')
            {
                nextch ();
                type = isfloat ? lrealToken : lulongToken;
            }
        }

        if (ctype.tolower (ch) == 'i')
        {
            ch = nextAndPeekch ();
            type = limaginaryToken;
        }

        Token token = createToken (start.contain (source.mark), type);

        if (isfloat)
            token.floating = fvalue;
        else
            token.integer = value;
        return token;
    }

    /** Return whether this token is this type and consume it if it is. */
    final bit handleToken (TokenType type)
    {
        if (token.type != type)
            return false;
        nextToken ();
        return true;
    }

    /** Give an error if the token is not this type, consume the token, return success. */
    final bit expectToken (TokenType type)
    {
        bit value = ensureToken (type);
        nextToken ();
        return value;
    }

    /** Give an error if the token is not this type, return success. */
    final bit ensureToken (TokenType type)
    {
        if (type != token.type)
        {
            error (new ExpectTokenError (token.mark, token.typeNameBase (type), token.typeName ()));
            return false;
        }

        return true;
    }
}
